###############################################################
###################### REPLICATION PAPER ###################### 
###############################################################
############### Ranjit Lall and Peter Volberding ##############
###############################################################

################# Data ################# 

## Data file saved as "data.dta" on my computer 
data<-read.dta("data.dta")
head(data)
dim(data)
names(data)

cbind(data$year, data$trend)


################# Replication of Table 2 ################# 
z.out.1 <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)
summary(z.out.1)

z.out.2 <- zelig(mefp ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)
summary(z.out.2)

z.out.3 <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)
summary(z.out.3)

z.out.4 <- zelig(arr ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)
summary(z.out.4)


#### TABLE FORMATTING ####

### coefficients ###
table2 <- cbind(c(z.out.1$coefficients[1:3], NA, z.out.1$coefficients[4:10]), z.out.2$coefficients, c(z.out.3$coefficients[1:3], NA, z.out.3$coefficients[4:10]), z.out.4$coefficients)
table2.1 <- rbind(table2, c(log(z.out.1$theta), log(z.out.2$theta), log(z.out.3$theta), log(z.out.4$theta)))
table2.2 <- rbind(table2.1, c(257, 257, 257, 257))
table2.2

library(xtable)
xtable(table2.2)


####### STANDARD ERRORS TABLE 2 #######
### cluster for Models 1-4 ###
z.out.cluster1 <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend + ccode, model = "negbin", robust=T, data = data)
cluster1234 <- as.data.frame(model.matrix(z.out.cluster1))$ccode

### formula for robust clustered SEs ###

clx <-
function(fm, dfc, cluster){
 library(sandwich)
 library(lmtest)
 M <- length(unique(cluster))
 N <- length(cluster)
 # dfc <- ((N-M)/(N-fm$rank))
 dfc <- 1
 u <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum))
 vcovCL <- dfc*sandwich(fm, meat=crossprod(u)/(N))
 coeftest(fm, vcovCL)
 }

### SEs with formula ###
se1 <- clx(z.out.1, cluster=cluster1234)
se1[,2]

m<-length(unique(cluster1234))
n <- length(cluster1234)
dfc <- (n-m)/(n-z.out.1$rank)

se2 <- clx(z.out.2, cluster=cluster1234)
se2[,2]

se3 <- clx(z.out.3, cluster=cluster1234)
se3[,2]

se4 <- clx(z.out.4, cluster=cluster1234)
se4[,2]

table2se <- cbind(c(se1[1:3,2], NA, se1[4:10,2]), se2[,2], c(se3[1:3,2], NA, se3[4:10,2]), se4[,2])
library(xtable)
xtable(table2se)


################# Replication for Table 3 ################# 
data
data2 <- data[data$ltenpolity2 >10,]
dim(data2)

z.out.5 <- zelig(mefp ~ lnewplp + lexecleft + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)
summary(z.out.5)

z.out.6 <- zelig(mefp ~ lnewplp + lexecleft + ilexecleft_lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)
summary(z.out.6)

z.out.7 <- zelig(arr ~ lnewplp + lexecleft + ltenpolity2+ lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)
summary(z.out.7)

z.out.8 <- zelig(arr ~ lnewplp + lexecleft + ilexecleft_lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)
summary(z.out.8)


#### TABLE FORMATTING ####

### coefficients ###
table3 <- round(cbind(c(z.out.5$coefficients[1:3],0,z.out.5$coefficients[4:11]), z.out.6$coefficients, c(z.out.7$coefficients[1:3],0,z.out.7$coefficients[4:11]), z.out.8$coefficients),3)
table3.1 <- rbind(table3, c(log(z.out.5$theta), log(z.out.6$theta), log(z.out.7$theta), log(z.out.8$theta)))
table3.2 <- rbind(table3.1, c(257, 257, 257, 257))
table3.2

xtable(table3.2)


####### STANDARD ERRORS TABLE 2 #######
### cluster for Models 5-8 ###
z.out.cluster2 <- zelig(mefp ~ lnewplp + lexecleft + ilexecleft_lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend + ccode, model = "negbin", robust=T, data = data2)
cluster5678 <- as.data.frame(model.matrix(z.out.cluster2))$ccode
length(cluster5678)


### SEs with formula ###
se5 <- clx(z.out.5, cluster=cluster5678)
se5[,2]

se6 <- clx(z.out.6, cluster=cluster5678)
se6[,2]

se7 <- clx(z.out.7, cluster=cluster5678)
se7[,2]

se8 <- clx(z.out.8, cluster=cluster5678)
se8[,2]


table3se <- cbind(c(se5[1:3,2], NA, se5[4:11,2]), se6[,2], c(se7[1:3,2], NA, se7[4:11,2]), se8[,2])
library(xtable)
xtable(table3se)



###################################################
################## Lagged variables ###############
###################################################

lag <- read.csv("lagged.csv")
lag[,c(3,4)]
na1 <- c(NA, NA)
na1
lag2 <- rbind(na1,lag[,c(3,4)])
data.lag <- cbind(data,lag2)
head(data.lag)


### Model 1 w/o data imputation ###

# Removed llngdppc 
### Note: removing trend eliminates lnewplp significance
lag.z.out.1 <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X, model = "negbin", data = data.lag)
summary(lag.z.out.1)  # revised model
summary(z.out.1)    # original model 


# Removed llngdppc 
### Note: removing trend eliminates interaction and llndgdpconstant significance 
lag.z.out.2 <- zelig(mefp ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdpconstant + ldebt + docs + X, model = "negbin", data = data.lag)
summary(lag.z.out.2)
summary(z.out.2)

# Removed llngdppc
lag.z.out.3 <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + trend + docs + X.1, model = "negbin", data = data.lag)
summary(lag.z.out.3)
summary(z.out.3)

# Removed llngdppc
lag.z.out.4 <- zelig(arr ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdpconstant + ldebt + trend + X.1, model = "negbin", data = data.lag)
summary(lag.z.out.4)
summary(z.out.4)

# Results: The effects of adding just a lagged DV are also effective


######################################
############### AMELIA ###############
######################################


### Removing interaction terms
data.amelia <- data[,-c(7, 16)]

### Adding lagged DVs and 
head(data.amelia)
data.amelia2 <- cbind(data.amelia, lag2)
head(data.amelia2)

# install.packages("Amelia")
library(Amelia)

### Setting bounds for lnewplp, lfiring, ldebt
bounds <- matrix(c(5, 0, 20, 7, 0,500, 11, 0, 300), nrow=3, ncol=3, byrow=T)

### Imputation 
set.seed(02138)
a.out <- amelia(data.amelia2, m = 5, idvars = c("X", "X.1", "trend", "mefp"), ts = "year", cs = "ccode", ords = "ltenpolity2", noms="lexecleft", bounds=bounds, max.resample=1000)



###### New Models & Results for Table 2 ######


# MODEL 1
z.out.1 <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)

a.out.model1 <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out$imputations)

a.out.model1.new <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + ldebt + docs + trend + X, model = "negbin", data = a.out$imputations)


summary(z.out.1)           # original model
summary(a.out.model1)      # original model + imputed data
summary(a.out.model1.new)  # new model + imputed data



# MODEL 2
z.out.2 <- zelig(mefp ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)

a.out.model2 <- zelig(mefp ~ lnewplp + ltenpolity2 + ltenpolity2:lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out$imputations)

a.out.model2.new <- zelig(mefp ~ lnewplp + ltenpolity2 + lnewplp:ltenpolity2 + lfiring + linline + llngdppc + ldebt + docs + trend + X, model = "negbin", data = a.out$imputations)


summary(z.out.2)           # original model
summary(a.out.model2)      # original model + imputed data
summary(a.out.model2.new)  # new model + imputed data




# MODEL 3
z.out.3 <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)

a.out.model3 <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out$imputations)

a.out.model3.new <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + llngdppc + ldebt + trend + docs + X.1, model = "negbin", data = a.out$imputations)


summary(z.out.3)           # original model
summary(a.out.model3)      # original model + imputed data
summary(a.out.model3.new)  # new model + imputed data



# MODEL 4
z.out.4 <- zelig(arr ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data)

a.out.model4 <- zelig(arr ~ lnewplp + ltenpolity2 + lnewplp:ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out$imputations)

a.out.model4.new <- zelig(arr ~ lnewplp + ltenpolity2 + lnewplp:ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X.1, model = "negbin", data = a.out$imputations)


summary(z.out.4)           # original model
summary(a.out.model4)      # original model + imputed data
summary(a.out.model4.new)  # new model + imputed data





# Code to find the number of included observations

obs.test <- a.out$imputations[[1]]
head(obs.test)
obs.test1 <- obs.test[,1:13]
head(obs.test1)
obs.test2 <- na.omit(obs.test1)
dim(obs.test2)



###### Zero Inflated Poisson for Models 3 & 4 ######
library(pscl)

zero.model3 <- zeroinfl(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, dist="negbin", data = data)

zero.model3.new <- zeroinfl(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X.1, dist="negbin", data = a.out$imputations[[1]])

summary(z.out.3) 			# original
summary(zero.model3)        # original + zero inflated
summary(zero.model3.new)    # new + zero + imputed data


zero.model4 <- zeroinfl(arr ~ lnewplp + ltenpolity2 + iltenpolity2_lnewplp + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, dist="negbin", data = data)

zero.model4.new <- zeroinfl(arr ~ lnewplp + ltenpolity2 + ltenpolity2:lnewplp + lfiring + linline + llngdpconstant + ldebt + docs + trend + X.1, dist="negbin", data = a.out$imputations[[1]])

summary(z.out.4)		   # original
summary(zero.model4)       # original + zero inflated
summary(zero.model4.new)   # new + zero inflated + imputed



######### Imputing Data for Table 3 ######### 

### Adding the lagged DV
data.amelia.table3 <- cbind(data, lag2)

### Removing interaction terms
data.amelia.table3.1 <- data.amelia.table3[,-c(7, 16)]
head(data.amelia.table3.1)

### Only putting in democratic countries (where ltenpolity2>10)
data.amelia3 <- data.amelia.table3.1[data.amelia.table3.1$ltenpolity2 >10,]
dim(data3)

### Verify number of observations equals original paper (113 observations)
obs.test<-na.omit(data.amelia3[,1:14])
dim(obs.test)

head(data.amelia3)
### Setting bounds for lnewplp, lfiring, ldebt
bounds <- matrix(c(5, 0, 20, 7, 0,500, 11, 0, 300), nrow=3, ncol=3, byrow=T)

### Imputation 
set.seed(02138)
a.out.2 <- amelia(data.amelia3, m = 5, idvars = c("X", "X.1", "trend", "mefp", "arr"), ts = "year", cs = "ccode", ords = "ltenpolity2", noms="lexecleft", bounds=bounds, max.resample=1000)

head(a.out.2$imputations[[1]])
data.amelia3

###### New Models & Results for Table 3 ######

# MODEL 5
z.out.5 <- zelig(mefp ~ lnewplp + lexecleft + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)

a.out.model5 <- zelig(mefp ~ lnewplp + lexecleft + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out.2$imputations)

a.out.model5.new <- zelig(mefp ~ lnewplp + lexecleft + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X, model = "negbin", data = a.out.2$imputations)


summary(z.out.5)           # original model
summary(a.out.model5)      # original model + imputed data
summary(a.out.model5.new)  # new model + imputed data



# MODEL 6
z.out.6 <- zelig(mefp ~ lnewplp + lexecleft + ilexecleft_lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)

a.out.model6 <- zelig(mefp ~ lnewplp + lexecleft + lexecleft:lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out.2$imputations)

a.out.model6.new <- zelig(mefp ~ lnewplp + lexecleft +  lexecleft:lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X, model = "negbin", data = a.out.2$imputations)


summary(z.out.6)           # original model
summary(a.out.model6)      # original model + imputed data
summary(a.out.model6.new)  # new model + imputed data



# MODEL 7
z.out.7 <- zelig(arr ~ lnewplp + lexecleft + ltenpolity2+ lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)

a.out.model7 <- zelig(arr ~ lnewplp + lexecleft + ltenpolity2+ lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out.2$imputations)

a.out.model7.new <- zelig(arr ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + trend + docs + X.1, model = "negbin", data = a.out.2$imputations)


summary(z.out.7)           # original model
summary(a.out.model7)      # original model + imputed data
summary(a.out.model7.new)  # new model + imputed data



# MODEL 8
z.out.8 <- zelig(arr ~ lnewplp + lexecleft + ilexecleft_lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = data2)
summary(z.out.8)

a.out.model8 <- zelig(arr ~ lnewplp + lexecleft + lexecleft:lnewplp + ltenpolity2 + lfiring + linline + llngdppc + llngdpconstant + ldebt + docs + trend, model = "negbin", data = a.out.2$imputations)

a.out.model8.new <- zelig(arr ~ lnewplp + lexecleft + lexecleft:lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X.1, model = "negbin", data = a.out.2$imputations)

summary(z.out.8)           # original model
summary(a.out.model8)      # original model + imputed data
summary(a.out.model8.new)  # new model + imputed data


######################################
############ Diagnostics #############
######################################


######### Testing the necessity of log GDP and log GDP per capita

# Plot to visually see correlation
plot(data$llngdpconstant, data$llngdppc, xlab= "Log GDP", ylab = "Log GDP per capita", main="GDP vs. GDP per capita", color=data$ccode)

library(ggplot2)
qplot(llngdpconstant, llngdppc, data=data, xlab= "Log GDP", ylab = "Log GDP per capita", main="GDP vs. GDP per capita", color=ccode)


# Function to obtain median correlation between GDP and GDP per capita clustered by country

r2 <- NULL
for(i in 1:length(unique(data$ccode))){
	newdat <- subset(data, data$ccode==unique(data$ccode)[i])	
	if(dim(na.omit(newdat[,c("llngdpconstant", "llngdppc")]))>1){
	lm.out <- lm(newdat$llngdpconstant ~ newdat$llngdppc)
	r2[i] <- summary(lm.out)$r.squared}
}
median(na.omit(r2))



######## Extra Amelia diagnostics
## Distribution of observed (black) vs. imputed data (red)
plot(a.out, which.vars=5:12, cex.main=0.9)
head(a.out$iterHist)

# Data seems to be relatively well imputed. 


## How much of each variable is missing
missmap(a.out)


###### Testing of residuals with and without time trend when lagged DV added  

a.out.model1_trend <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + trend + X, model = "negbin", data = a.out$imputations[[1]])

a.out.model1_notrend <- zelig(mefp ~ lnewplp + ltenpolity2 + lfiring + linline + llngdpconstant + ldebt + docs + X, model = "negbin", data = a.out$imputations[[1]])

par(mfrow=c(1,2))
plot(model.matrix(a.out.model1_trend)[,9], residuals(a.out.model1_notrend), main="No Trend Term", ylab="Residuals", xlab="Time (years)")
abline(lm(residuals(a.out.model1_notrend) ~ model.matrix(a.out.model1_trend)[,9]), col="red", lwd=3)
plot(model.matrix(a.out.model1_trend)[,9], residuals(a.out.model1_trend), main="Trend Term", ylab="Residuals", xlab="Time (years)")
abline(lm(residuals(a.out.model1_trend) ~ model.matrix(a.out.model1_trend)[,9]), col="red", lwd=3)


######## Testing the difference between incluced and omitted data distributions

### We especially want to see how this affects labor power, which alone has 629 omissions
# Subset data
plp.na<-data[is.na(data$lnewplp),]
plp.data <- data[!is.na(data$lnewplp),]
dim(plp.na)
dim(plp.data)

data.test <- data[,-c(4, 7, 15, 16)]
head(data.test)

reg.data <- na.omit(data.test)
dim(reg.data)
summary(reg.data)

reg.data.2 <- !na.omit(data.test)
summary(reg.data.2)

# plot of the three densities
par(mfrow=c(1,3))
plot(density(na.omit(plp.data$llngdpconstant)), ylim=c(0,0.3), main="Log GDP", xlab="")
lines(density(na.omit(plp.na$llngdpconstant)), col="red")

plot(density(na.omit(plp.data$llngdppc)), ylim=c(0,0.5), main="Log GDP Per Capita", xlab="")
lines(density(na.omit(plp.na$llngdppc)), col="red")
plot(density(na.omit(plp.data$ltenpolity2)), ylim=c(0,0.09), main="Polity Scores", xlab="")
lines(density(na.omit(plp.na$ltenpolity2)), col="red")


## Median, means, and standard deviations of included and omitted data

length(unique(data$ccode))
length(unique(plp.na$ccode))


## UN voting
mean(na.omit(plp.na$linline))
mean(na.omit(plp.data$linline))

## Polity IV Scores
mean(na.omit(plp.na$ltenpolity2))
mean(na.omit(plp.data$ltenpolity2))
sd(na.omit(plp.na$ltenpolity2))
sd(na.omit(plp.data$ltenpolity2))
median(na.omit(plp.na$ltenpolity2))
median(na.omit(plp.data$ltenpolity2))

# Histogram of data
par(mfrow=c(1,2))
hist(na.omit(plp.data$ltenpolity2),20, ylim=c(0,120), main="Included Data" )
hist(na.omit(plp.na$ltenpolity2),20, ylim=c(0,120), main="NA Data")

## GDP per Capita
mean(na.omit(plp.na$llngdppc))
mean(na.omit(plp.data$llngdppc))
sd(na.omit(plp.na$llngdppc))
sd(na.omit(plp.data$llngdppc))
median(na.omit(plp.na$llngdppc))
median(na.omit(plp.data$llngdppc))

## GDP
mean(na.omit(plp.na$llngdpconstant))
mean(na.omit(plp.data$llngdpconstant))
sd(na.omit(plp.na$llngdpconstant))
sd(na.omit(plp.data$llngdpconstant))
median(na.omit(plp.na$llngdpconstant))
median(na.omit(plp.data$llngdpconstant))